library(data.table)
library(tidyr)
library(maps)
library(haven)
library(ggplot2)
library(dplyr)
library(readxl)
library(ggrepel)
library(wordcloud)
PREPARATION DATASETS FOR ANALYSIS #################### #################### ####################
PREP THE DATASET FOR ANALYSIS WVS 5 & 6 ####################
#read the data (Wave 5)
# Data of Wave 5
WV5_data <- readRDS("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/F00007944-WV5_Data_R_v20180912.rds")
# Convert WV5_data-object in data.frame
WV5_data_df <- as.data.frame(WV5_data)
# show first five columns
WV5_data_df
#rename the variables
WV5_data <- WV5_data_df %>%
rename(gender = V235, age = V237, country_code = V2, wave = V1, risktaking = V86, children = V56, married = V55, employed = V241, education = V238)
WV5_data
colnames(WV5_data)
[1] "wave" "V1A" "V1B" "country_code" "V2A" "V3" "V4"
[8] "V4_CO" "V5" "V5_CO" "V6" "V6_CO" "V7" "V7_CO"
[15] "V8" "V8_CO" "V9" "V9_CO" "V10" "V11" "V12"
[22] "V13" "V14" "V15" "V16" "V17" "V18" "V19"
[29] "V20" "V21" "V22" "V23" "V24" "V25" "V26"
[36] "V27" "V28" "V29" "V30" "V31" "V32" "V33"
[43] "V34" "V35" "V36" "V37" "V38" "V39" "V40"
[50] "V41" "V42" "V43" "V43_01" "V43_02" "V43_03" "V43_04"
[57] "V43_05" "V43_06" "V43_07" "V43_08" "V43_09" "V43_10" "V43_11"
[64] "V43_12" "V43_13" "V43_14" "V43_15" "V43_16" "V43_17" "V43_18"
[71] "V43_19" "V43_20" "V43_21" "V43_22" "V43_23" "V43_24" "V43_25"
[78] "V43_26" "V43_27" "V43_28" "V43_29" "V43_30" "V44" "V45"
[85] "V46" "V47" "V48" "V49" "V50" "V51" "V52"
[92] "V53" "V54" "married" "children" "V57" "V58" "V59"
[99] "V60" "V61" "V62" "V63" "V64" "V65" "V66"
[106] "V67" "V68" "V69" "V69_HK" "V70" "V70_HK" "V71"
[113] "V72" "V73" "V73_HK" "V74" "V74_HK" "V75" "V76"
[120] "V77" "V78" "V79" "V80" "V81" "V82" "V83"
[127] "V84" "V85" "risktaking" "V87" "V88" "V89" "V90"
[134] "V91" "V92" "V93" "V94" "V95" "V96" "V97"
[141] "V98" "V99" "V100" "V101" "V102" "V103" "V104"
[148] "V105" "V106" "V107" "V108" "V109" "V110" "V111"
[155] "V112" "V113" "V114" "V115" "V116" "V117" "V118"
[162] "V119" "V120" "V121" "V122" "V123" "V124" "V125"
[169] "V126" "V127" "V128" "V129" "V130" "V130_CA_1" "V130_IQ_1"
[176] "V130_IQ_2" "V130_IQ_3" "V130_IQ_4" "V130_NZ_1" "V130_NZ_2" "V131" "V132"
[183] "V133" "V134" "V135" "V136" "V137" "V138" "V139"
[190] "V140" "V141" "V142" "V143" "V144" "V145" "V146_00"
[197] "V146_01" "V146_02" "V146_03" "V146_04" "V146_05" "V146_06" "V146_07"
[204] "V146_08" "V146_09" "V146_10" "V146_11" "V146_12" "V146_13" "V146_14"
[211] "V146_15" "V146_16" "V146_17" "V146_18" "V146_19" "V146_20" "V146_21"
[218] "V146_22" "V147" "V148" "V149" "V150" "V151" "V151_IQ_A"
[225] "V151_IQ_B" "V152" "V153" "V154" "V155" "V156" "V157"
[232] "V158" "V159" "V160" "V161" "V162" "V163" "V164"
[239] "V165" "V166" "V167" "V168" "V169" "V170" "V171"
[246] "V172" "V173" "V174" "V175" "V176" "V177" "V178"
[253] "V179" "V180" "V181" "V182" "V183" "V184" "V185"
[260] "V186" "V187" "V188" "V189" "V190" "V191" "V192"
[267] "V193" "V194" "V195" "V196" "V197" "V198" "V199"
[274] "V200" "V201" "V202" "V203" "V204" "V205" "V206"
[281] "V207" "V208" "V209" "V210" "V211" "V212" "V213A"
[288] "V213B" "V213C" "V213D" "V213E" "V213F" "V213G" "V213H"
[295] "V213K" "V213L" "V213M" "V213N" "V214" "V215" "V216"
[302] "V217" "V218" "V219" "V220" "V221" "V222" "V223"
[309] "V224" "V225" "V226" "V227" "V228" "V229" "V230"
[316] "V231" "V232" "V233" "V233A" "V234" "gender" "V236"
[323] "age" "education" "V238CS" "V239" "V240" "employed" "V242"
[330] "V242A_CO" "V243" "V244" "V245" "V246" "V247" "V248"
[337] "V249" "V250" "V251" "V252" "V252B" "V253" "V253CS"
[344] "V254" "V255" "V255CS" "V256" "V257" "V257B" "V257C"
[351] "V258" "V259" "V259A" "V260" "V261" "V262" "V263"
[358] "V264" "V265" "S024" "S025" "Y001" "Y002" "Y003"
[365] "SACSECVAL" "SECVALWGT" "RESEMAVAL" "WEIGHTB" "I_AUTHORITY" "I_NATIONALISM" "I_DEVOUT"
[372] "DEFIANCE" "WEIGHT1A" "I_RELIGIMP" "I_RELIGBEL" "I_RELIGPRAC" "DISBELIEF" "WEIGHT2A"
[379] "I_NORM1" "I_NORM2" "I_NORM3" "RELATIVISM" "WEIGHT3A" "I_TRUSTARMY" "I_TRUSTPOLICE"
[386] "I_TRUSTCOURTS" "SCEPTICISM" "WEIGHT4A" "I_INDEP" "I_IMAGIN" "I_NONOBED" "AUTONOMY"
[393] "WEIGHT1B" "I_WOMJOB" "I_WOMPOL" "I_WOMEDU" "EQUALITY" "WEIGHT2B" "I_HOMOLIB"
[400] "I_ABORTLIB" "I_DIVORLIB" "CHOICE" "WEIGHT3B" "I_VOICE1" "I_VOICE2" "I_VOI2_00"
[407] "VOICE" "WEIGHT4B" "S001" "S007" "S018" "S019" "S021"
[414] "COW"
#select only the variables of interest
WV5_data <- WV5_data %>%
dplyr::select(gender, age, country_code, wave, risktaking, children, employed, education, married)
WV5_data
countrynames <- read.csv("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/countrynames.txt", header = FALSE, as.is = TRUE)
colnames(countrynames) <- c("code", "name")
# Assuming WV5_data has a column named country_code
WV5_data$country <- countrynames$name[match(WV5_data$country_code, countrynames$code)]
# Check the frequency of each country in the new column
table(WV5_data$country)
Andorra Argentina Australia Brazil Bulgaria Burkina Faso
1003 1002 1421 1500 1001 1534
Canada Chile China Colombia Cyprus (G) Egypt
2164 1000 1991 3025 1050 3051
Ethiopia Finland France Georgia Germany Ghana
1500 1014 1001 1500 2064 1534
Great Britain Guatemala Hong Kong Hungary India Indonesia
1041 1000 1252 1007 2001 2015
Iran Iraq Italy Japan Jordan Malaysia
2667 2701 1012 1096 1200 1201
Mali Mexico Moldova Morocco Netherlands New Zealand
1534 1560 1046 1200 1050 954
Norway Peru Poland Romania Russia Rwanda
1025 1500 1000 1776 2033 1507
Slovenia South Africa South Korea Spain Sweden Switzerland
1037 2988 1200 1200 1003 1241
Taiwan Thailand Trinidad and Tobago Turkey Ukraine United States
1227 1534 1002 1346 1000 1249
Uruguay Viet Nam Zambia
1000 1495 1500
# Display the updated WV5_data
print(WV5_data)
#Read Dataset (Wave 6)
WV6_data <- load("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/WV6_Data_R_v20201117.rdata")
WV6_data <- WV6_Data_R_v20201117
print(WV6_data)
#rename variables
WV6_data <- WV6_data %>%
rename(wave = V1, gender = V240, age = V242,country_code = V2, risktaking = V76, children = V58, married = V57, employed = V229, education = V248)
#select only the variables of interest
WV6_data <- WV6_data %>%
dplyr::select(gender, age, country_code, wave, risktaking, children, employed, education, married)
WV6_data
#decode daraset (Wave 6)
countrynames = read.csv("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/countrynames.txt", header=FALSE,as.is=TRUE)
colnames(countrynames) = c("code", "name")
WV6_data$country = countrynames$name [match(WV6_data$country_code, countrynames$code)]
table(WV6_data$country)
Algeria Argentina Armenia Australia Azerbaijan Belarus
1200 1030 1100 1477 1002 1535
Brazil Chile China Colombia Cyprus (G) Ecuador
1486 1000 2300 1512 1000 1202
Egypt Estonia Georgia Germany Ghana Haiti
1523 1533 1202 2046 1552 1996
Hong Kong India Iraq Japan Jordan Kazakhstan
1000 4078 1200 2443 1200 1500
Kuwait Kyrgyzstan Lebanon Libya Malaysia Mexico
1303 1500 1200 2131 1300 2000
Morocco Netherlands New Zealand Nigeria Pakistan Palestine
1200 1902 841 1759 1200 1000
Peru Philippines Poland Qatar Romania Russia
1210 1200 966 1060 1503 2500
Rwanda Singapore Slovenia South Africa South Korea Spain
1527 1972 1069 3531 1200 1189
Sweden Taiwan Thailand Trinidad and Tobago Tunisia Turkey
1206 1238 1200 999 1205 1605
Ukraine United States Uruguay Uzbekistan Yemen Zimbabwe
1500 2232 1000 1500 1000 1500
WV6_data
#combine the 2 dataset (Wave 6 + Wave 5)
WV5_data
WV6_data
WVS_data = rbind(WV5_data, WV6_data)
WVS_data
#exclusion of participants and omission of missing data (na)
WVS_data = subset(WVS_data, risktaking > 0 & gender > 0 & age >0 & education > 0 & employed > 0 & married > 0 & children >= 0)
data_Wave5 = subset(WV5_data, risktaking > 0 & gender > 0 & age >0 & education > 0 & employed > 0 & married > 0 & children >= 0)
data_Wave6 = subset(WV6_data, risktaking > 0 & gender > 0 & age >0 & education > 0 & employed > 0 & married > 0 & children >= 0)
WVS_data <- na.omit(WVS_data)
data_Wave5 <- na.omit(data_Wave5)
data_Wave6 <- na.omit(data_Wave6)
# Use the mutate function to change the country name
WVS_data <- WVS_data %>%
mutate(country = ifelse(country == "Great Britain", "United Kingdom", country))
#Transformation of item risktaking
# Transfrom risk item such that high values represent more risk taking
WVS_data$risktaking = 6 - WVS_data$risktaking + 1
# Transform risk variable into T-score (mean = 50, sd = 10)
WVS_data$T_score_risktaking = 10*scale(WVS_data$risktaking, center=TRUE,scale=TRUE)+50
WVS_data
#Transform risk variable into Z score
# Assuming T-scores have a mean of 50 and a standard deviation of 10
WVS_data$Z_score_risktaking = (WVS_data$T_score_risktaking - 50) / 10
# Print the resulting data frame
print(WVS_data)
WVS_data <- WVS_data %>%
group_by(country) %>%
mutate(z_score_age = scale(age))
WVS_data
WVS_data$gender = ifelse(WVS_data$gender == 1, 0, 1) # sex: male vs. female
WVS_data$children = ifelse(WVS_data$children == 0, 0, 1) # children: no vs. yes
WVS_data$married = ifelse(WVS_data$married == 1, 1, 0) # married: yes vs. no
WVS_data$employed = ifelse(WVS_data$employed < 4, 1, 0) # employed: yes vs. no
WVS_data$education = ifelse(WVS_data$education < 4, 0, 1) # education: no primary vs. primary+
PREP THE DATASET FOR ANALYSIS GPS ####################
#Add data GPS
gps_data <- haven::read_dta("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/GPS_dataset_individual_level/individual_new.dta")
head(gps_data)
gps_data <- gps_data %>%
drop_na(country, isocode, risktaking, gender, age)
# Display the cleaned data
gps_data
#select only the variables of interest
gps_data <- gps_data %>%
dplyr::select(country, isocode, ison, risktaking, gender, age)
gps_data
gps_data <- gps_data %>%
group_by(country) %>%
mutate(z_score_age = scale(age))
# Display the new column with Z-Scores per Country
gps_data
PREP THE DATASET FOR ANALYSIS HARDSHIP ####################
excel_path <- "/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/Hardship/Hardship_complete_2024.xlsx"
hardship <- read_excel(excel_path)
# Create a vector of labels with the same length as the number of columns in 'countryfacts'
labels <- c("country","mean_homicide","gdp","gini_income","Infant_mortality","life_expect","primary_female_enrollment_rate")
# Print the result
print(hardship)
# Create the 'hardship' column in the 'hardship' data frame
hardship <- hardship %>%
mutate(hardship = (mean_homicide + gdp + gini_income + Infant_mortality + life_expect + primary_female_enrollment_rate) / 6)
hardship
hardship$mean_homicide=log(hardship$mean_homicide)
hardship$gdp=log(hardship$gdp)
hardship$Infant_mortality=log(hardship$Infant_mortality)
hardship$life_expect=log(hardship$life_expect)
hardship$gini_income=log(hardship$gini_income)
hardship$primary_female_enrollment_rate=log(hardship$primary_female_enrollment_rate)
# changing variables into the same direction
# Reverse Codierung
hardship$mean_homicide=scale(hardship$mean_homicide)
hardship$gdp=scale(-hardship$gdp)
hardship$Infant_mortality=scale(hardship$Infant_mortality)
hardship$life_expect=scale(-hardship$life_expect)
hardship$gini_income=scale(hardship$gini_income)
hardship$primary_female_enrollment_rate=scale(hardship$primary_female_enrollment_rate)
hardship
hardship$hardship=(hardship$mean_homicide+hardship$gdp+hardship$gini_income+hardship$life_expect+hardship$Infant_mortality+hardship$primary_female_enrollment_rate)/6
hardship
PREP THE DATASET FOR ANALYSIS MIXED-MODELS ####################
#Add Hardship to WVS_data
WVS_mixed_model <- left_join(WVS_data, hardship, by = "country")
WVS_mixed_model
#Add Hardship to gps_data
gps_mixed_model <- left_join(gps_data, hardship, by = "country")
gps_mixed_model
DESCRIPTIVE INFORMATION #################### #################### ####################
#table with female percentage, mean age, mean risk taking per countries (summary of the countries)
table_data_WVS <- WVS_data %>%
group_by(country) %>%
summarise(
n = n(),
female_percentage = mean(gender == 1) * 100,
mean_age = mean(age, na.rm = TRUE),
age_range = paste(min(age, na.rm = TRUE), "-", max(age, na.rm = TRUE)),
mean_risktaking = mean(Z_score_risktaking, na.rm = TRUE)
)
table_data_WVS
country_counts <- WVS_data %>%
count(country)
# Print the result
print(country_counts)
hardship_index_distribution <- hardship %>%
group_by(country) %>%
summarize(
mean = mean(hardship, na.rm = TRUE),
median = median(hardship, na.rm = TRUE),
sd = sd(hardship, na.rm = TRUE),
min = min(hardship, na.rm = TRUE),
max = max(hardship, na.rm = TRUE),
n = sum(!is.na(hardship))
)
# Print the result
print(hardship_index_distribution)
VISUALIZATION #################### #################### ####################
#World map
world_map <- map_data("world")
recorded_countries <- unique(WVS_data$country)
world_map$recorded <- ifelse(world_map$region %in% recorded_countries, "Recorded", "Not Recorded")
ggplot(world_map, aes(x = long, y = lat, group = group, fill = recorded)) +
geom_polygon(color = "white") +
scale_fill_manual(values = c("Recorded" = "red", "Not Recorded" = "lightgrey"), guide = "none") +
theme_void() +
labs(title = "WVS", fill = "Status") +
theme(legend.position = "none", plot.title = element_text(hjust = 0.5))
#graph across countries: risk taking vs age vs gender (Z-score for age and risk taking)
# Risk vs age with color-coded gender per Country
# Risk vs age with color-coded gender per Country
# Skalierung des Z-Scores für das Alter anpassen
WVS_data$z_score_age_scaled <- 15 * WVS_data$z_score_age + 42
ggplot(WVS_data, aes(z_score_age_scaled, Z_score_risktaking, color = as.factor(gender))) +
geom_point(position = position_jitter(width = 0.1, height = 0.1), size = 0.1) +
geom_smooth(method = "lm") +
geom_vline(xintercept = 42, linetype = "dashed", color = "black", size = 1) +
scale_color_manual(values = c("blue", "red"), labels = c("Male", "Female")) +
labs(color = "Gender") +
xlab("Age") +
ylab("Risk Taking") +
scale_x_continuous(breaks = seq(0, 100, by = 15), limits = c(0, 100)) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
WVS_data
#regression table (risk taking and age -> Z-score)
regression_results_WVS <- WVS_data %>%
group_by(country) %>%
do(model = lm(Z_score_risktaking ~ scale(age) + gender, data = .)) %>%
summarize(
country = first(country),
intercept = coef(summary(model))[1, 1],
slope_age = coef(summary(model))[2, 1],
slope_gender = coef(summary(model))[3, 1]
)
regression_results_WVS
#table intercept and slope
regression_results_gps <- gps_data %>%
group_by(country) %>%
do(model = lm(risktaking ~ z_score_age + gender, data = .)) %>%
summarize(
country = first(country),
intercept = coef(summary(model))[1, 1],
slope_age = coef(summary(model))[2, 1],
slope_gender = coef(summary(model))[3, 1]
)
regression_results_gps
common_countries <- intersect(WVS_data$country, gps_data$country)
common_countries
[1] "Argentina" "Australia" "Brazil" "Canada" "Chile" "China" "Egypt"
[8] "Finland" "France" "Georgia" "Germany" "Ghana" "Hungary" "India"
[15] "Indonesia" "Iran" "Japan" "Mexico" "Moldova" "Morocco" "Netherlands"
[22] "Peru" "Poland" "Romania" "Russia" "Rwanda" "South Africa" "South Korea"
[29] "Spain" "Sweden" "Switzerland" "Thailand" "Turkey" "Ukraine" "United Kingdom"
[36] "Algeria" "Colombia" "Estonia" "Haiti" "Iraq" "Jordan" "Kazakhstan"
[43] "Nigeria" "Pakistan" "Philippines" "United States" "Zimbabwe"
# Countries that are in both datasets
common_countries <- intersect(WVS_data$country, gps_data$country)
# Filter original dataset based on common countries
new_gps <- gps_data[gps_data$country %in% common_countries, ]
# View the new dataset
new_gps
regression_results_WVS_new <- new_WVS %>%
group_by(country) %>%
do(model = lm(Z_score_risktaking ~ scale(age) + gender, data = .)) %>%
summarize(
country = first(country),
intercept_WVS = coef(summary(model))[1, 1],
slope_age_WVS = coef(summary(model))[2, 1],
slope_gender_WVS = coef(summary(model))[3, 1]
)
regression_results_WVS_new
regression_results_gps_new <- new_gps %>%
group_by(country) %>%
do(model = lm(risktaking ~ scale(age) + gender, data = .)) %>%
summarize(
country = first(country),
intercept_gps = coef(summary(model))[1, 1],
slope_age_gps = coef(summary(model))[2, 1],
slope_gender_gps = coef(summary(model))[3, 1]
)
regression_results_gps_new
#Merging Regression Results with Additional Data and Hardship Index
regression_results_gps_new
regression_results_WVS_new
# Assuming "country" is the common column
merged_results <- merge(regression_results_gps_new, regression_results_WVS_new, by = "country", all = TRUE)
# Read data from the Excel file
new_data <- read_excel("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/Hardship/Hardship_complete_2024.xlsx")
# Now you can work with the 'new_data' object
print(new_data)
# Perform the left_join operation
new_data <- left_join(merged_results, new_data, by = "country")
# Select specific columns
new_data <- new_data %>%
dplyr::select(country, intercept_gps, slope_age_gps, slope_gender_gps, intercept_WVS, slope_age_WVS, slope_gender_WVS, isocode)
# Print the final data
print(new_data)
hardship_index <- read_excel("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/Hardship/Hardship_complete_2024.xlsx")
print(hardship_index)
hardship_data_complete <- left_join(regression_results_WVS, hardship_index, by = "country")
hardship_data_complete
hardship_data_complete$mean_homicide=log(hardship_data_complete$mean_homicide)
hardship_data_complete$gdp=log(hardship_data_complete$gdp)
hardship_data_complete$Infant_mortality=log(hardship_data_complete$Infant_mortality)
hardship_data_complete$life_expect=log(hardship_data_complete$life_expect)
hardship_data_complete$gini_income=log(hardship_data_complete$gini_income)
hardship_data_complete$primary_female_enrollment_rate=log(hardship_data_complete$primary_female_enrollment_rate)
# changing variables into the same direction
# Reverse Codierung
hardship_data_complete$mean_homicide=scale(hardship_data_complete$mean_homicide)
hardship_data_complete$gdp=scale(-hardship_data_complete$gdp)
hardship_data_complete$Infant_mortality=scale(hardship_data_complete$Infant_mortality)
hardship_data_complete$life_expect=scale(-hardship_data_complete$life_expect)
hardship_data_complete$gini_income=scale(hardship_data_complete$gini_income)
hardship_data_complete$primary_female_enrollment_rate=scale(hardship_data_complete$primary_female_enrollment_rate)
hardship_data_complete
hardship_data_complete$hardship=(hardship_data_complete$mean_homicide+hardship_data_complete$gdp+hardship_data_complete$gini_income+hardship_data_complete$life_expect+hardship_data_complete$Infant_mortality+hardship_data_complete$primary_female_enrollment_rate)/6
hardship_data_complete
# Plotting comparing interecepts
ggplot(new_data, aes(x = intercept_gps, y = intercept_WVS, label = isocode)) +
geom_point(size = 1.5) +
geom_text(aes(label = isocode), vjust = -0.5, hjust = -0.5) +
geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
labs(title = "Comparison of Intercept Values",
x = "Intercept_gps",
y = "Intercept_WVS") +
theme_minimal() +
xlim(c(0, 1.2)) +
ylim(c(0, 1.2)) +
coord_fixed()
# Assuming you have a data frame named 'new_data' with columns 'intercept_gps', 'intercept_WVS', and 'isocode'
ggplot(new_data, aes(x = intercept_gps, y = intercept_WVS, label = isocode)) +
geom_point(size = 3) +
geom_text_repel(
aes(label = isocode),
box.padding = 0.5,
point.padding = 0.1,
force = 5
) +
geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
labs(x = "Intercept Global Preference Study",
y = "Intercept World Value Survey") +
theme_minimal() +
xlim(c(0, 1.07)) +
ylim(c(0, 1.07)) +
coord_fixed()
# Annotate with word cloud
wordcloud(words = new_data$isocode, freq = rep(1, nrow(new_data)), scale = c(2, 0.5))
# Annotate with word cloud
wordcloud(words = new_data$isocode, freq = rep(1, nrow(new_data)), scale = c(2, 0.5))
# Assuming merged_results has columns intercept_gps and intercept_WVS
model <- lm(intercept_WVS ~ intercept_gps, data = merged_results)
# View the summary of the regression model
summary(model)
Call:
lm(formula = intercept_WVS ~ intercept_gps, data = merged_results)
Residuals:
Min 1Q Median 3Q Max
-0.64518 -0.13560 -0.00272 0.16597 0.70522
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.27537 0.04179 6.590 4.14e-08 ***
intercept_gps 0.48328 0.13549 3.567 0.000871 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.266 on 45 degrees of freedom
Multiple R-squared: 0.2204, Adjusted R-squared: 0.2031
F-statistic: 12.72 on 1 and 45 DF, p-value: 0.0008713
# Calculate the correlation
correlation <- cor(merged_results$intercept_gps, merged_results$intercept_WVS)
# Print the correlation coefficient
print(correlation)
[1] 0.469472
#Comparison of the Effect of Age on Risk Taking between GPS and WVS Data
# Plotting using ggplot2
ggplot(new_data, aes(x = slope_age_gps, y = slope_age_WVS, label = isocode)) +
geom_point(size = 3) +
geom_text_repel(
aes(label = isocode),
box.padding = 0.5,
point.padding = 0.1,
force = 5
) +
geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
labs(x = "Effect of Age on Risk Taking Global Preference Study",
y = "Effect of Age on Risk Taking World Value Survey") +
theme_minimal() +
xlim(-0.5, 0) +
ylim(c(-0.5, 0)) +
coord_fixed()
new_data
correlation <- cor(new_data$slope_age_gps, new_data$slope_age_WVS)
# Print the correlation coefficient
print(correlation)
[1] 0.4154239
#Comparison of the Effect of Gender on Risk Taking between GPS and WVS Data
# Plotting using ggplot2
ggplot(new_data, aes(x = slope_gender_gps, y = slope_gender_WVS, label = isocode)) +
geom_point(size = 3) +
geom_text_repel(
aes(label = isocode),
box.padding = 0.5,
point.padding = 0.1,
force = 5
) +
geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
labs(x = "Effect of Gender on Risk Taking Global Preference Study",
y = "Effect of Gender on Risk Taking World Value Survey") +
theme_minimal() +
xlim(-0.6, 0.12) +
ylim(c(-0.6, 0.12)) +
coord_fixed()
new_data
correlation <- cor(new_data$slope_gender_gps, new_data$slope_gender_WVS)
# Print the correlation coefficient
print(correlation)
[1] 0.2821009
#Calculation of Hardship Index for WVS Data
#hardship for WVS
hardship_values <- read_excel("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/Hardship/hardship_complete_2024.xlsx")
labels <- c("country", "mean_homicide", "gdp", "gini_income", "Infant_mortality", "life_expect", "isocode", "primary_female_enrollment_rate")
print(hardship_values)
# Replace the column names as per your actual column names
# Z-standardize specific columns
hardship_values$gdp <- scale(hardship_values$gdp)
hardship_values$gini_income <- scale(hardship_values$gini_income)
hardship_values$Infant_mortality <- scale(hardship_values$Infant_mortality)
hardship_values$life_expect <- scale(hardship_values$life_expect)
hardship_values$mean_homicide <- scale(hardship_values$mean_homicide)
hardship_values$primary_female_enrollment_rate <- scale(hardship_values$primary_female_enrollment_rate)
hardship_values
hardship_values <- hardship_values%>%
mutate(hardship = (mean_homicide + gdp + Infant_mortality + life_expect + gini_income + primary_female_enrollment_rate) / 6)
hardship_values
hardship_data_complete <- left_join(regression_results_WVS, hardship_values, by = "country")
hardship_data_complete
#Relationship between Hardship and Risk Taking for WVS Data
#For WVS
ggplot(hardship_data_complete, aes(x = hardship, y = intercept, label = isocode)) +
geom_point(size = 3) +
geom_text_repel(
aes(label = isocode),
box.padding = 0.5,
point.padding = 0.1,
force = 5
) +
geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
labs(x = "Hardship",
y = "Risk Taking") +
theme_minimal() +
xlim(-2, 2) +
ylim(-0.6, 1) +
coord_fixed(ratio = 2.5)
#For WVS
ggplot(hardship_data_complete, aes(x = hardship, y = slope_age, label = isocode)) +
geom_point(size = 3) +
geom_text_repel(
aes(label = isocode),
box.padding = 0.5,
point.padding = 0.1,
force = 5
) +
geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
labs(x = "Hardship",
y = "Age Effect") +
theme_minimal() +
xlim(-2, 2) +
coord_fixed(ratio = 8)
ggplot(hardship_data_complete, aes(x = hardship, y = intercept, label = isocode)) +
geom_point(size = 3) +
geom_text_repel(
aes(label = isocode),
box.padding = 0.5,
point.padding = 0.1,
force = 5
) +
geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
labs(x = "Hardship",
y = "Risk Taking") +
theme_minimal() +
coord_fixed()
# Lineares Modell für den ersten Fall
model_intercept <- lm(hardship ~ intercept, data = hardship_data_complete)
summary_intercept <- summary(model_intercept)
print(summary_intercept)
Call:
lm(formula = hardship ~ intercept, data = hardship_data_complete)
Residuals:
Min 1Q Median 3Q Max
-0.6388 -0.2009 -0.0141 0.1770 1.0821
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.05232 0.06421 -0.815 0.418
intercept 0.12196 0.13847 0.881 0.381
Residual standard error: 0.3426 on 74 degrees of freedom
Multiple R-squared: 0.01037, Adjusted R-squared: -0.002999
F-statistic: 0.7758 on 1 and 74 DF, p-value: 0.3813
# Correlation Hardship und Risk Taking
correlation <- cor(hardship_data_complete$hardship, hardship_data_complete$intercept)
print(paste("Correlation Hardship und Risk Taking:", correlation))
[1] "Correlation Hardship und Risk Taking: 0.101856818032701"
# Lineares Modell für den ersten Fall
model_intercept <- lm(hardship ~ slope_age, data = hardship_data_complete)
summary_intercept <- summary(model_intercept)
print(summary_intercept)
Call:
lm(formula = hardship ~ slope_age, data = hardship_data_complete)
Residuals:
Min 1Q Median 3Q Max
-0.6855 -0.1981 -0.0007 0.1342 1.1061
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.03575 0.09073 -0.394 0.695
slope_age -0.14672 0.42568 -0.345 0.731
Residual standard error: 0.3441 on 74 degrees of freedom
Multiple R-squared: 0.001603, Adjusted R-squared: -0.01189
F-statistic: 0.1188 on 1 and 74 DF, p-value: 0.7313
# Correlation Hardship und Risk Taking
correlation <- cor(hardship_data_complete$hardship, hardship_data_complete$slope_age)
print(paste("Correlation Hardship und Risk Taking:", correlation))
[1] "Correlation Hardship und Risk Taking: -0.0400350639522652"
# Lineares Modell für den ersten Fall
model_intercept <- lm(hardship ~ slope_gender, data = hardship_data_complete)
summary_intercept <- summary(model_intercept)
print(summary_intercept)
Call:
lm(formula = hardship ~ slope_gender, data = hardship_data_complete)
Residuals:
Min 1Q Median 3Q Max
-0.55821 -0.18400 -0.01724 0.13650 0.99214
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.1881 0.0926 -2.031 0.0458 *
slope_gender -0.7764 0.3626 -2.141 0.0355 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.3342 on 74 degrees of freedom
Multiple R-squared: 0.05834, Adjusted R-squared: 0.04562
F-statistic: 4.585 on 1 and 74 DF, p-value: 0.03555
# Correlation Hardship und Risk Taking
correlation <- cor(hardship_data_complete$hardship, hardship_data_complete$slope_gender)
print(paste("Correlation Hardship und Risk Taking:", correlation))
[1] "Correlation Hardship und Risk Taking: -0.241543858957759"
WVS_data
# Lineares Modell für Risk Taking vs. Age
model_risk_age <- lm(Z_score_risktaking ~ z_score_age + factor(gender), data = WVS_data)
summary_risk_age <- summary(model_risk_age)
print(summary_risk_age)
Call:
lm(formula = Z_score_risktaking ~ z_score_age + factor(gender),
data = WVS_data)
Residuals:
Min 1Q Median 3Q Max
-1.9043 -0.8023 -0.1176 0.7837 2.6654
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.131135 0.003656 35.87 <2e-16 ***
z_score_age -0.184692 0.002531 -72.98 <2e-16 ***
factor(gender)2 -0.251665 0.005064 -49.69 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.975 on 148524 degrees of freedom
Multiple R-squared: 0.04944, Adjusted R-squared: 0.04942
F-statistic: 3862 on 2 and 148524 DF, p-value: < 2.2e-16
correlation <- by(WVS_data, WVS_data$gender, function(x) cor(x$z_score_age, x$Z_score_risktaking))
print(paste("Correlation Hardship und Risk Taking:", correlation))
[1] "Correlation Hardship und Risk Taking: -0.200207985711739" "Correlation Hardship und Risk Taking: -0.172610623925891"
new_data
# Lineares Modell für Risk Taking vs. Age
model <- lm(slope_gender_gps ~ slope_gender_WVS, data = new_data)
summary_model <- summary(model)
print(summary_model)
Call:
lm(formula = slope_gender_gps ~ slope_gender_WVS, data = new_data)
Residuals:
Min 1Q Median 3Q Max
-0.37268 -0.06468 -0.02119 0.09295 0.31794
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.13779 0.04304 -3.202 0.00251 **
slope_gender_WVS 0.33943 0.17208 1.973 0.05472 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1248 on 45 degrees of freedom
Multiple R-squared: 0.07958, Adjusted R-squared: 0.05913
F-statistic: 3.891 on 1 and 45 DF, p-value: 0.05472
correlation <- cor(new_data$slope_age_gps, new_data$slope_age_WVS)
print(paste("Correlation Hardship und Risk Taking:", correlation))
[1] "Correlation Hardship und Risk Taking: 0.415423889806963"
ggplot(hardship_data_complete, aes(x = hardship, y = slope_gender, label = isocode)) +
geom_point(size = 3) +
geom_text_repel(
aes(label = isocode),
box.padding = 0.5,
point.padding = 0.1,
force = 5
) +
geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
labs(x = "Hardship",
y = "Gender Effect") +
theme_minimal() +
xlim(-2, 2) +
coord_fixed(ratio = 6)